
##intitial installing of special packages

# Note: This is how I initially downloaded the regime data. 
#For replication purposes, simply load the data as provided in the data folder 
#of the replication files. 

# remotes::install_github("xmarquez/democracyData")
# devtools::install_github("vdeminstitute/vdemdata")
# 
# library(vdem)
# library(democracyData)


## Load required packages
pacman::p_load( tidyverse, fastDummies)
remotes::install_github("xmarquez/democracyData")
library(democracyData)



##set unique id fun
unique_id <- function(x, ...) {
  id_set <- x %>% select(...)
  id_set_dist <- id_set %>% distinct
  if (nrow(id_set) == nrow(id_set_dist)) {
    TRUE
  } else {
    non_unique_ids <- id_set %>% 
      filter(id_set %>% duplicated()) %>% 
      distinct()
    suppressMessages(
      inner_join(non_unique_ids, x) %>% arrange(...)
    )
  }
}


## Skip commented code for replication purposes, this is just for documentation
## of how the data was initially downloaded. 

# ## load datasets using democracy data
# lied <- LIED
# vdem <- vdemdata::vdem
# fh <- download_fh()
# magaloni <- magaloni
# polity <- polityIV
# anckar <- anckar
# htw <- wahman_teorell_hadenius
# 
# 
# 
# 
# ##save datasets
# saveRDS(vdem, file = "Data/Regimedata/vdem13.rds")
# saveRDS(lied, file = "Data/Regimedata/lied6_4.rds")
# saveRDS(fh, file = "Data/Regimedata/fh_23.rds")
# saveRDS(magaloni, file = "Data/Regimedata/magaloni_awd1_0.rds")
# saveRDS(polity, file = "Data/Regimedata/polity_v.rds")
# saveRDS(htw, file = "Data/Regimedata/htw_ARD_6.rds")
# saveRDS(anckar, file = "Data/Regimedata/AF_1.rds")
# 
# 
# 


## Load data

lied <- readRDS("Data/Regimedata/lied6_4.rds")
vdem <- readRDS("Data/Regimedata/vdem13.rds")
fh <- readRDS("Data/Regimedata/fh_23.rds")
magaloni <- readRDS("Data/Regimedata/magaloni_awd1_0.rds")
polity <- readRDS("Data/Regimedata/polity_v.rds")
anckar <- readRDS("Data/Regimedata/AF_1.rds")
htw <- readRDS("Data/Regimedata/htw_ARD_6.rds")


### Prepare data
## This part creates dummies for the three categories PDA, Other Autocracy, and Democracy
# based on the classification in Table 1. It also creates a countrycode system combining
# the COW, Gleditsch and Ward and Polity countrycodes to enable as many succesful merges
# as possible. The base for this are the COW code, where that is missing, the GW codes
# are used, where they are mssing, the Polity codes are used.



#VDem: 


vdem <- vdem %>% filter (year >= 1900)

vdem <- vdem %>%
  country_year_coder(country_name,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("extended_country_name", 
                                           "GWn", "cown", 
                                           "polity_ccode", 
                                           "in_GW_system", 
                                           "in_cow_system", 
                                           "in_polity_system",
                                           "polity_startdate",
                                           "polity_enddate"))





sapply(vdem %>% select (COWcode, cown, GWn, polity_ccode, country_id, country_name), function(x) sum(is.na(x)))

vdem %>% filter (COWcode != cown) %>% select(country_name, year)

vdem %>% select(cown, year, country_name, extended_country_name) %>% filter (!is.na(cown)) %>% unique_id(extended_country_name, year)

vdem %>% filter (is.na(cown)) %>% select(cown, COWcode)


vdem <- vdem %>% mutate(Final_Code = cown)
vdem <- vdem %>% mutate(Final_Code = coalesce(Final_Code, GWn))
vdem <- vdem %>% mutate(Final_Code = coalesce(Final_Code, polity_ccode))



vdem %>% select(cown, year, country_name, extended_country_name, Final_Code) %>% filter (!is.na(cown)) %>% unique_id(Final_Code, year)

vdem <- vdem %>% distinct(Final_Code, year, .keep_all = TRUE)
vdem %>% select(cown, year, country_name, extended_country_name, Final_Code) %>% filter (!is.na(cown)) %>% unique_id(Final_Code, year)


RoW <- vdem%>%select(year, Final_Code, cown, GWn, polity_ccode, country_id_vdem = country_id, v2x_regime, extended_country_name)

RoW <- RoW%>%mutate(
  RegType_RoW = case_when (
    v2x_regime == 0   ~ "CA",
    v2x_regime == 1   ~ "EA",
    v2x_regime > 1  ~ "DEM"),
  RegType_RoW_n = case_when (
    v2x_regime == 0   ~ 0,
    v2x_regime == 1   ~ 1,
    v2x_regime > 1  ~ 2))


RoW <- dummy_cols(RoW, select_columns = c('RegType_RoW'), remove_first_dummy =F, ignore_na = T)
names(RoW)
RoW <- RoW %>% select(!cown, !GWn, !polity_ccode)
rm(vdem)
####
names(lied)

lied <- lied %>% distinct(lied_country, year, .keep_all = TRUE)


sapply(lied, function(x) sum(is.na(x)))

sum(is.na(lied$Final_Code) & is.na(lied$vdem))

lied <- lied %>%
  country_year_coder(lied_country,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("polity_ccode" ))





lied <- lied %>% mutate(Final_Code = cown)
lied <- lied %>% mutate(Final_Code = coalesce(Final_Code, GWn))
lied <- lied %>% mutate(Final_Code = coalesce(Final_Code, polity_ccode))

lied <- lied %>% distinct(Final_Code, year, .keep_all = TRUE)

# lied <- lied %>% mutate(Final_Code = coalesce(Final_Code, lied_cow))

lied %>% filter(!is.na(Final_Code)) %>%  unique_id(Final_Code, year)
lied %>% filter(!is.na(Final_Code)) %>%  unique_id(extended_country_name, year)

lied <- lied %>% filter(!is.na(Final_Code))




lied <- lied%>%mutate(
  RegType_lied = case_when (
    lexical_index < 3   ~ "CA",
    lexical_index == 3   ~ "EA",
    lexical_index > 3   ~ "DEM"),
  RegType_lied_n = case_when (
    lexical_index < 3   ~ 0,
    lexical_index == 3   ~ 1,
    lexical_index > 3   ~ 2))


lied <- lied%>%select(year, Final_Code, RegType_lied, lexical_index, RegType_lied_n, extended_country_name, country_name_lied = lied_country)


lied <- dummy_cols(lied, select_columns = c('RegType_lied'), remove_first_dummy =F, ignore_na = T)

names(lied)
###


# Prep FH
sapply(fh %>% select ( cown, GWn, extended_country_name), function(x) sum(is.na(x)))

fh <- fh%>%select(year, status, cown)%>%rename(status_fh = status)
names(fh)

unique(fh$status_fh)
fh <- fh %>%mutate(fh_status_n = case_when (
  status_fh == "NF"  ~ 0,
  status_fh == "F" ~ 2,
  status_fh == "PF"  ~ 1))


fh  <- dummy_cols(fh, select_columns = c('status_fh'), remove_first_dummy =F, ignore_na = T)
fh %>% unique_id(year, cown)

names(fh)
fh <- fh %>% rename(Final_Code = cown)

fh  %>%  unique_id(Final_Code, year)


## magaloni 

sapply(magaloni, function(x) sum(is.na(x)))
magaloni <- magaloni %>% mutate(Final_Code = cown)
magaloni <- magaloni %>% mutate(Final_Code = coalesce(Final_Code, GWn))

magaloni <- magaloni %>% distinct(Final_Code, year, .keep_all = TRUE)





magaloni %>% unique_id(year, Final_Code)

magaloni <- magaloni%>%mutate(
  RegType_magaloni = case_when (
    regime_nr %in% c("Military", "Single Party", "Monarchy")   ~ "CA",
    regime_nr == "Multiparty"   ~ "EA",
    regime_nr == "Democracy"  ~ "DEM"),
  RegType_magaloni_n = case_when (
    regime_nr %in% c("Military", "Single Party", "Monarchy")   ~ 0,
    regime_nr == "Multiparty"   ~ 1,
    regime_nr == "Democracy"  ~ 2))

magaloni <- dummy_cols(magaloni, select_columns = c('RegType_magaloni'), remove_first_dummy =F, ignore_na = T)
names(magaloni)
magaloni <- magaloni %>% select(Final_Code, year,RegType_magaloni, RegType_magaloni_n, RegType_magaloni_DEM, RegType_magaloni_CA, RegType_magaloni_EA, extended_country_name )


###
sapply(htw , function(x) sum(is.na(x)))


htw <- htw %>% mutate(Final_Code = cown)
htw <- htw %>% mutate(Final_Code = coalesce(Final_Code, GWn))

htw <- htw %>% filter(!is.na(regimeny))
htw %>% filter(!is.na(regimeny)) %>% unique_id(Final_Code, year)

htw <- htw %>% filter (!is.na(regime1ny))


htw <- htw %>% distinct(Final_Code, year, .keep_all = TRUE)

htw <- htw %>% 
  mutate(HTW_RegType = case_when(
    regime1ny == 100 ~ "Democracy",
    regime1ny == 4 ~ "MP_Autocracy",
    regime1ny != 4 & regime1ny != 100 ~ "Non_MP_Autocracy"),
    HTW_RegType_n = case_when(
      regime1ny == 100  ~ 2,
      regime1ny == 4 ~ 1,
      regime1ny != 4 & regime1ny != 100 ~ 0
    ))

table(htw$regime1ny, htw$HTW_RegType)


htw <- htw%>%select(year,Final_Code, HTW_RegType, HTW_RegType_n, regime1ny, extended_country_name)%>%rename(regime1ny_htw = regime1ny)


htw <- dummy_cols(htw, select_columns = c('HTW_RegType'), remove_first_dummy =F, ignore_na = T)
htw %>% unique_id(year, Final_Code)

###anckar

sapply(anckar , function(x) sum(is.na(x)))



anckar <- anckar %>% mutate(Final_Code = cown)

table(anckar$regimenarrowcat)
table(anckar$democracy)



anckar <- anckar%>%
  mutate(AnckarRegtype = case_when(
    democracy == 1 ~ "Democracy",
    regimenarrowcat == "Multi-party authoritarian rule" ~ "MP_Autocracy",
    democracy != 1 & regimenarrowcat != "Multi-party authoritarian rule" ~ "Non_MP_Autocracy"),
    AnckarRegtype_n = case_when(
      democracy == 1 ~ 2,
      regimenarrowcat == "Multi-party authoritarian rule" ~ 1,
      democracy != 1 & regimenarrowcat != "Multi-party authoritarian rule" ~ 0))

table(anckar$AnckarRegtype)


anckar <- anckar %>% distinct(Final_Code, year, .keep_all = TRUE)
anckar %>% unique_id(year, Final_Code)


anckar <-anckar%>%select( Final_Code, year, AnckarRegtype, AnckarRegtype_n, regimenarrowcat, democracy, extended_country_name)%>%rename(democracy_anckar = democracy)
anckar <- dummy_cols(anckar, select_columns = c('AnckarRegtype'), remove_first_dummy =F, ignore_na = T)
names(anckar)
##

sapply(polity , function(x) sum(is.na(x)))




polity <- polity %>% mutate(Final_Code = cown)
polity <- polity %>% mutate(Final_Code = coalesce(Final_Code, GWn))


polity %>% unique_id(year, Final_Code)

polity <- polity %>% distinct(Final_Code, year, .keep_all = TRUE)

polity <- polity%>%select(year, Final_Code, polity, extended_country_name)

polity <- polity%>%mutate(
  Politytype = case_when (
    polity %in% -10:-6  ~ "Autocracy",
    polity %in% 6:10  ~ "Democracy",
    polity %in% c(-5:5,-66, -77, -88)   ~ "Anocracy"),
  Politytype_n = case_when (
    polity %in% -10:-6  ~ 0,
    polity %in% 6:10  ~ 2,
    polity %in% c(-5:5,-66, -77, -88)   ~ 1))

table(polity$polity, polity$Politytype)

polity  <- dummy_cols(polity, select_columns = c('Politytype'), remove_first_dummy =F, ignore_na = T)

polity %>% unique_id(year, Final_Code)




# Prep Miller

miller <- load("Data/Regimedata/Miller_Strategic_Origins_Data.RData")
miller <- x
rm(x)
names(miller)
bmr <- read_csv("Data/Regimedata/BMR_demcoding.csv")
bmr$test_id <- paste0(bmr$ccode, bmr$year)
miller$test_id <- paste0(miller$ccode, miller$year)
a <- unique(miller$test_id)
b <- unique(bmr$test_id)
c <- a %in% b
sum(c == F)
miss <- miller[!c,]

bmr <- bmr %>% select(ccode, year, country)
miller <- left_join(miller, bmr, by = c("year", "ccode"))
miller <- miller %>%
  country_year_coder(country,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("extended_country_name", 
                                           "GWn", "cown", 
                                           "polity_ccode", 
                                           "in_GW_system", 
                                           "in_cow_system", 
                                           "in_polity_system",
                                           "polity_startdate",
                                           "polity_enddate"))


sapply(miller , function(x) sum(is.na(x)))

miller %>%filter(is.na(cown))%>%select(extended_country_name, year, ccode, country)

miller <- miller %>% mutate(Final_Code = cown)
miller <- miller %>% mutate(Final_Code = coalesce(Final_Code, GWn))
miller <- miller %>% mutate(Final_Code = coalesce(Final_Code, ccode))


miller %>% unique_id(year, Final_Code)
miller <- miller %>% distinct(Final_Code, year, .keep_all = TRUE)



miller <- miller%>%mutate(
  RegType_miller = case_when (
    rt_x == 1   ~ "CA",
    rt_x == 3   ~ "EA",
    rt_x == 2  ~ "DEM"),
  RegType_miller_n = case_when (
    rt_x == 1  ~ 0,
    rt_x == 3   ~ 1,
    rt_x == 2  ~ 2))




miller <- miller %>% select(year, RegType_miller, RegType_miller_n, Final_Code, rt_x, d_rtx1, d_rtx2, d_rtx3, ccode_miller = ccode)%>%
  rename(rtx_miller = rt_x, RegType_miller_EA = d_rtx3, RegType_miller_CA = d_rtx1, RegType_miller_DEM = d_rtx2) %>%
  filter(!is.na(RegType_miller))


mergecols <-  c("year", "Final_Code")
df <- full_join(lied,RoW, by = mergecols)
df <- full_join(df,miller, by = mergecols)
df <- full_join(df,htw, by = mergecols)
df <- full_join(df,magaloni, by = mergecols)
df <- full_join(df,anckar, by = mergecols)
df <- full_join(df,polity, by = mergecols)
df <- full_join(df,fh, by = mergecols)



df %>% unique_id(year, Final_Code)

df <- df %>% select(!c(extended_country_name.x, extended_country_name.x.x, extended_country_name.x.x.x, extended_country_name.y, extended_country_name.y.y, extended_country_name.y.y.y))
names(df)

saveRDS(df, file = "Data/Regimedata/Measures_merged.rds")




